Visualizing Chicago’s rats infestation and sanitation problems


# Libraries
library(lubridate)
library(tidyverse)
library(dplyr)
library(gghighlight)
library(ggrepel)
library(ggthemes)
library(readxl)
library(scales)
library(statar)
library(stringr)
library(tidyr)
library(here)
library(httr)
library(jsonlite)
library(magrittr)
library(grid)
library(ggmap)
library(sf)
library(cowplot)
library(ggplotify)

Emerging Rat Concerns…

In recent years, Chicago has been receiving increasing numbers of rodent infestation complaints. It is important for the city government to actively engage in resolving rat complaints, as rats can cause huge property damage and spread harmful pathogens to humans. This exploratory visualization project illustrates the severity and patterns of rat complaints within the city of Chicago, and seeks to help policymakers to find effective measures to mitigate rodent problems.

# Reading in Data
census_tracts <- 
  read_csv(here("data", 
                "tract_community.csv"))

community_numbers <-
  read_excel(here("data", 
                  "community_numbers.xlsx"))

chicago_rodents <- 
  read_csv(here("data", 
                "Chicago_311_Rodent_2014-2018.csv"))

boston_311 <- 
  read_csv(here("data",
                "Boston_311.csv"))

dc_311_2018 <- 
  read_csv(here("data",
                "DC_Service_Requests_2018.csv"))

nyc_rodents_2018 <- 
  read_csv(here("data",
                "NYC_311_Rodent_2018.csv"))

la_rodents_2018 <- 
  read_csv(here("data",
                "LA_Dead_Animal_Removal_2018.csv"))

detroit_rodents_2018 <- 
  read_csv(here("data",
                "Detroit_Rodent_2018.csv"))

chicago_sanitation_2018 <- 
  read_csv(here("data",
                "Chicago_311_Sanitation_Violations_2018.csv"))

chicago_vacant_property <-
  read_excel(here("data",
                  "Chicago_vacant_properties.xlsx"))

# Scraping Data from ACS
base <- "https://api.census.gov/data/2016/acs/acs5?"
acs_vars <- "get=B01003_001E,B07013_001E,B07013_003E,B19301_001E"
state_tracts <- "&for=tract:*&in=state:17"
ACS_TOKEN <- 
  read_delim(here("data", "ACSAuth.txt"), 
             delim = "/", 
         col_names = F)$X1
key <- paste("&key=", ACS_TOKEN, sep = "")

url <- paste(base, acs_vars, state_tracts, key, sep = "")
response <- GET(url)
all_il_tracts <- 
  jsonlite::fromJSON(content(response, as = "text")) %>%
  data.frame() %>%
  slice(-1)

colnames(all_il_tracts) <- 
  c("Population",
    "Total_Living_In_Area",
    "Renter_Count",
    "Income_Per_Capita",
    "State",
    "County",
    "Tract")

Understand Rat Problems through Graphs

# Preparing Data
census_tracts <-
  census_tracts %>%
  left_join(community_numbers, 
            by = c("Community Area" = "Number"))

chicago_rodents_2018 <-
  chicago_rodents %>%
  filter(year(mdy(`Creation Date`)) == 2018)

chicago_rodents_2017 <-
  chicago_rodents %>%
  filter(year(mdy(`Creation Date`)) == 2017)

chicago_rodents_2016 <-
  chicago_rodents %>%
  filter(year(mdy(`Creation Date`)) == 2016)

chicago_rodents_2015 <-
  chicago_rodents %>%
  filter(year(mdy(`Creation Date`)) == 2015)

chicago_rodents_2014 <-
  chicago_rodents %>%
  filter(year(mdy(`Creation Date`)) == 2014)
  
boston_rodents_2018 <-
  boston_311 %>%
  filter(str_detect(TYPE, "Rodent"),
         year(open_dt) == 2018)

dc_rodents_2018 <-
  dc_311_2018 %>%
  filter(str_detect(SERVICECODEDESCRIPTION, "Rodent"))

chicago_rat_community_17 <-
  chicago_rodents %>%
  filter(year(mdy(`Creation Date`)) == 2017) %>%
  group_by(`Community Area`) %>%
  summarise(`Number of Complaints` = n()) %>%
  inner_join(community_numbers, by = c("Community Area" = "Number")) %>%
  select(Community, `Number of Complaints`)

to_numeric <- 
  function(column) {
    return(as.numeric(levels(column))[column])
  }

Chicago_communities <- 
  all_il_tracts %>%
  mutate(Census_Tract = str_c(State, County, Tract),
         Total_Income = to_numeric(Income_Per_Capita) * 
                        to_numeric(Population)) %>%
  select(Census_Tract, 
         Population,
         Total_Income,
         Total_Living_In_Area, 
         Renter_Count) %>%
  mutate(Census_Tract = as.numeric(Census_Tract)) %>%
  inner_join(census_tracts, 
             by = c("Census_Tract" = "Census Tract")) %>%
  mutate_at(vars(-"Community",
                 -"Total_Income"), 
            .funs = "to_numeric") %>%
  select(-Census_Tract, -`Community Area`)

Chicago_communities <- 
  Chicago_communities %>%
  group_by(Community) %>%
  summarise_all(.funs = sum) %>%
  mutate(`Income Per Capita` = round(Total_Income / Population, 0),
         `Renter Proportion` = Renter_Count / Total_Living_In_Area) %>%
  select(Community, Population, `Renter Proportion`, `Income Per Capita`)

chicago_rat_community_17 <-
  Chicago_communities %>%
  inner_join(chicago_rat_community_17, by = "Community")

chicago_rat_community_17 %<>%
  mutate(`Number of Requests per 10000 People` = 
           `Number of Complaints` / Population * 10000
         )

# Plot Custom Theme
custom_theme <-
  theme(
          plot.title = element_text(size = 16,
                                    face = "bold"),
       plot.subtitle = element_text(size = 14),
        plot.caption = element_text(size = 10),
         plot.margin =          margin(t = 30, 
                                       r = 30, 
                                       b = 30, 
                                       l = 30, 
                                    unit = "pt"),
          axis.title = element_text(size = 13,
                                    face = "bold"),
           axis.text = element_text(size = 12,
                                    face = "bold"),
         axis.line.x = element_line(
                                   color = "black",
                                    size = 0.7
                                   ), 
   legend.background = element_rect(fill = "linen"),
          legend.key = element_rect(fill = NA, 
                                   color = NA),
  panel.grid.major.x = element_blank(),
  panel.grid.major.y = element_line(size = 0.6, 
                                   color = "black",
                                linetype = "dotted"),
  panel.grid.minor.x = element_blank(),
  panel.grid.minor.y = element_blank(),
        panel.border = element_blank(),
    panel.background = element_rect(fill = "linen"),
     plot.background = element_rect(fill = "linen")
  )

“Rat Capital” of United States?

In 2018, we saw a total of 37577 rat complaints in Chicago. Compared with other major US cities like New York and Los Angeles, and cities that were historically troubled by rodent infestations such as Detroit, Chicago not only has the highest total complaint count, but appeared to have the largest number of complaints per 10000 population all year round. From the graph below, we can also see that rat problems were especially prevalent in the summer.

# Data Processing for Plot
city_mon_summary <- 
  function(df, date_col, population, city_name) {
    monthly_summary <-
      df %>%
      group_by(month(date_col, 
                   label = TRUE, 
                    abbr = TRUE)) %>%
      summarise(`Number of Complaints` = n(),
                `Number of Complaints per 10000 Population` = 
                  `Number of Complaints` / population * 10000,
                                  City = city_name) %>%
      rename(Month = 1)
    return(monthly_summary)
  }

chicago_months <- 
  city_mon_summary(chicago_rodents_2018,
                   mdy(chicago_rodents_2018$`Creation Date`),
                   2716450,
                   "Chicago")

boston_months <- 
  city_mon_summary(boston_rodents_2018,
                   boston_rodents_2018$open_dt,
                   685094,
                   "Boston")

dc_months <- 
  city_mon_summary(dc_rodents_2018,
                   dc_rodents_2018$ADDDATE,
                   693972,
                   "Washington DC")

nyc_months <- 
  city_mon_summary(nyc_rodents_2018,
                   mdy_hms(nyc_rodents_2018$`Created Date`),
                   8622698,
                   "New York City")

la_months <-
  city_mon_summary(la_rodents_2018,
                   mdy_hms(la_rodents_2018$CreatedDate),
                    3999759,
                   "Los Angeles")

detroit_months <- 
  city_mon_summary(detroit_rodents_2018,
                   mdy_hms(detroit_rodents_2018$`Created At`),
                   673104,
                   "Detroit")

city_months_18 <- 
  bind_rows(chicago_months,
            boston_months,
            dc_months,
            nyc_months,
            la_months,
            detroit_months)

# Plot
custom_tt_color <- 
  function(text, color, x, y) {
    annotation_custom(
      grob = textGrob(label = text, 
                      hjust = 0, 
                         gp = gpar(col = color, 
                              fontsize = 16,
                              fontface = "bold")),
      xmin = x, xmax = x,
      ymin = y, ymax = y
    )   
  }

p1 <- city_months_18 %>%
  ggplot(aes(x = Month, 
             y = `Number of Complaints per 10000 Population`, 
         group = City,
         color = City)) +
  geom_point() +
  geom_line() + 
  geom_line(data = chicago_months,
            size = 1.1) +
  geom_hline(
      yintercept = 0, 
            size = 0.6,
           color = "black",
        linetype = "dotted"
      ) +
  labs(
           title = " ",
        subtitle = "Rat complaints per 10000 population by highly affected US cities in 2018",
         caption = "\nSource: City Open Data Portals"
    ) +
  scale_color_manual(values = c("#02a06e", # green
                                "#e11f27", # red
                                "#037dae", # blue
                                "#fcb11a", # orange
                                "#543092", # purple
                                "#763f02")) + # brown
  custom_theme +
  theme(     
             plot.title = element_text(hjust = 0.47),
          plot.subtitle = element_text(hjust = 0.47),
            legend.text = element_text( size = 13),
            axis.line.y = element_blank(),
      legend.background = element_rect(),
        legend.position = "top",
        legend.box.just = "top",
   legend.justification = "center"
     ) +
  guides(color = guide_legend(title = NULL)) +
  custom_tt_color("Chicago Has the Most Rat Complaints per 10000 Population All Year Round", "black", 1.3, 25.7) +
  custom_tt_color("Chicago", "#e11f27", 1.3, 25.7)

p1 <- ggplot_gtable(ggplot_build(p1))
p1$layout$clip[p1$layout$name == "panel"] <- "off"
grid.newpage()
grid.draw(p1)

Distribution of Sanitation Code Violations and Rat Complaints

Although rat complaint distribution is not equivalent of the actual rat distribution, Chicago Lincoln Park Zoo has recently conducted research indicating that it can be highly reflective of rodent infestation situation. The research also shows that the abundance of rats is potentially related to the abundance of garbage and other types of sanitation problems. That begs the question where rats and garbage are complained frequently and how much they overlap in different areas of Chicago. As is shown in the map below, we can see that sanitation code violations and rat complaints are connected.

# Data Processing for Plot 
GOOGLE_TOKEN = read_delim(here("data", "GoogleAuth.txt"),
                        delim = "/", 
                    col_names = F)$X1

register_google(GOOGLE_TOKEN)

chicago <- get_map("Chicago", 
                maptype = "toner",
                  scale = 4,
                 source = "stamen")

chicago_rodent_geo <-
  chicago_rodents_2018 %>% 
  group_by(Longitude, Latitude) %>%
  summarise(`Complaint Count` = n()) %>%
  filter(`Complaint Count` > 6, !is.na(Longitude)) %>%
  mutate(`Complaint Type` = "Rat Complaints\n(>6 Times)")

chicago_sanitation_geo <-
  chicago_sanitation_2018 %>%
  group_by(Longitude, Latitude) %>%
  summarise(`Complaint Count` = n()) %>%
  filter(`Complaint Count` > 3, !is.na(Longitude)) %>%
  mutate(`Complaint Type` = "Sanitation Code\nViolations\n(>3 Times)")

chicago_geo <- bind_rows(chicago_rodent_geo,
                         chicago_sanitation_geo)

# Plot 
ggmap(chicago) +
  geom_point(aes(x = `Longitude`,
                 y = `Latitude`,
             color = `Complaint Type`,
              size = `Complaint Count`),
             alpha = 0.75,
              data = chicago_geo) +
  labs(title = "Rat Complaints Were Generally Located near\nPlaces with Sanitation Concerns (2018)",
    subtitle = "The north of Chicago has slightly more complaints than the south",
     caption = "Source: Chicago Data Portal",
           x = "Longitude",
           y = "Latitude") +
  scale_x_continuous(limits = c(-88.13, -87.23), expand = c(0, 0)) +
  scale_y_continuous(limits = c(41.58, 42.1), expand = c(0, 0)) +
  scale_color_manual(values = c("#e11f27", # red
                                "#037dae" # blue
                                )) + 
  custom_theme +
  theme(legend.justification = "center",
                  plot.title = element_text(hjust = 0.6),
               plot.subtitle = element_text(hjust = 0.6),
                 plot.margin = margin(t = 30,
                                      r = 42, 
                                      l = 0, 
                                      b = 30,
                                   unit = "pt"),
                legend.title = element_text( size = 13, 
                                             face = "bold"),
                 legend.text = element_text( size = 12),
             legend.position = c(.8, .7),
           legend.background = element_rect( fill = "linen"),
          panel.grid.major.y = element_blank(),
                axis.title.x = element_blank(),
                axis.title.y = element_blank(),
                 axis.text.x = element_blank(),
                 axis.text.y = element_blank(),
                 axis.line.x = element_blank(),
                 axis.line.y = element_blank(),
                axis.ticks.x = element_blank(),
                axis.ticks.y = element_blank())

Rats and Empty Homes

Rats, being the ultimate squatters, see vacant houses as their prime real estate. Such places offer hassle-free shelter; nooks and crannies for hiding and playing; carpeting for nests; and toilet bowls for swimming. The best houses for rodents are those that have been vacant for a while or haven’t yet been fixed up, as these often provide a convenient water supply via leaks or other accumulated moisture. Vacant Properties that are foreclosed generally entail high risks, and once a single unit has been infested, it is only a matter of time before rats venture out into the neighborhood in search of food and new places to inhabit. The plot below shows that communities with more vacant properties are more likely to attract rats.

# Data Processing for Plot 
property_rat <- 
  function(data) {
    chicago_rat_community <-
      data %>%
      group_by(`Community Area`) %>%
      summarise(`Number of Complaints` = n()) %>%
      inner_join(community_numbers, by = c("Community Area" = "Number")) %>%
      select(Community, `Number of Complaints`)

    chicago_rat_property <-
      chicago_rat_community %>%
      inner_join(chicago_vacant_property, by = "Community")
    
    return(chicago_rat_property)
  }

chicago_rat_property_14 <- property_rat(chicago_rodents_2014)
chicago_rat_property_15 <- property_rat(chicago_rodents_2015)
chicago_rat_property_16 <- property_rat(chicago_rodents_2016)
chicago_rat_property_17 <- property_rat(chicago_rodents_2017)
chicago_rat_property_18 <- property_rat(chicago_rodents_2018)

chicago_rat_property <- bind_rows(chicago_rat_property_14,
                                  chicago_rat_property_15,
                                  chicago_rat_property_16,
                                  chicago_rat_property_17,
                                  chicago_rat_property_18)

# Plot 
chicago_rat_property %>%
  ggplot(aes(x = `Vacant Housing Units`,
             y = `Number of Complaints`)) +
  geom_hex() +
  xlim(0, 3850) +
  labs(
       title = "Communities with More Vacant Properties Are More Likely to\nSee Rodent Complaints in Chicago (2014 to 2018)",
    subtitle = "Perfect insulation in the walls for nesting may be what is attracting rodents",
     caption = "Source: American Community Survey & Chicago Data Portal",
           x = "Number of Vacant Housing Units"
  ) +
  scale_fill_continuous("Community\nCount", 
                         low = "orange", 
                        high = "firebrick") +
  custom_theme +
  theme(
    plot.subtitle = element_text(hjust = 0.5),
     plot.caption = element_text(hjust = 1.29),
      axis.line.y = element_blank()
  )

How Has Response Time Changed Over Years?

The city has become increasingly responsive to rat complaints. In 2016, the mayor’s absolute goal for city workers was to respond to all relevant complaints within 5 days, and now it seems that this goal is largely achieved. The bar chart below shows how days to resolve complaints have been decreasing throughout different months of years. Starting from 2016, response time has dropped drastically and the number of total rat complaints has also started to go down.

# Plot 
chicago_rodents %>%
  filter(Status == "Completed") %>%
  mutate(Resolving_Days = as.numeric(mdy(`Completion Date`)) - 
                          as.numeric(mdy(`Creation Date`)),
         `Days to Resolve\nRat Complaints` = fct_collapse(
           factor(Resolving_Days),
           "within 3 days" = as.character(0:3),
             "4 to 7 days" = as.character(4:7),
            "8 to 14 days" = as.character(8:14),
           "15 to 30 days" = as.character(15:30),
            "over 30 days" = as.character(31:500)
         ),
          year = year(mdy(`Creation Date`)),
         month = month(mdy(`Creation Date`), 
                       label = TRUE, 
                        abbr = TRUE)
         ) %>%
  group_by(year, month) %>%
  ggplot() +
  geom_bar(aes(x = month, 
            fill = `Days to Resolve\nRat Complaints`),
        position = position_stack(reverse = TRUE)) +
  facet_wrap(vars(year), nrow = 5) +
  labs(title = "Rat Complaint Response Time Has Dropped over\nthe Past Few Years in Chicago (2014 to 2018)",
    subtitle = "Since 2017, most rat complaints are resolved within 7 days",
     caption = "Source: Chicago Data Portal",
           x = "Month",
           y = "Number of Resolved Complaints") +
    scale_fill_manual(values = c("#02a06e",  # green
                                 "#037dae",  # blue
                                 "#fcb11a",  # orange
                                 "#763f02",  # brown
                                 "#e11f27"), # red
                      "Days to Resolve\nRat Complaints") + 
  custom_theme +
  theme(plot.title = element_text(hjust = 0.55),
     plot.subtitle = element_text(hjust = 0.55),
      plot.caption = element_text(hjust = 1.27),
       legend.text = element_text( size = 10),
      legend.title = element_text( size = 12, 
                                   face = "bold"),
        strip.text = element_text( face = "bold",
                                   size = 12),
       axis.line.y = element_blank(),
  strip.background = element_blank())

Response Time Reduced for All Communities

From 2014 to 2018, Chicago’s overall response time to rat complaints have been steadily decreasing for all seasons. Besides seasonality analysis, looking at response time changes at a community level may also help policymakers more effectively target potential rat issues in the future. The map set below shows that the irresponsiveness problem appeared most severe in 2015, especially in the west of Chicago, and shortly after, almost all Chicago communities became much more responsive to rat complaints.

# Data Processing and Mapping Preparation
cowplot_theme <-
  theme(panel.background = element_rect(fill = "linen"),
         plot.background = element_rect(fill = "linen"),
       legend.background = element_rect(fill = "linen"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
            panel.border = element_blank())

create_response <- 
  function(year_data) {
    result <- 
      year_data %>%
      filter(Status == "Completed") %>%
      mutate(Resolving_Days = as.numeric(mdy(`Completion Date`)) - 
                              as.numeric(mdy(`Creation Date`))) %>%
      group_by(`Community Area`) %>%
      summarise(avg_resolving_days = ceiling(mean(Resolving_Days))) %>%
      mutate(`Community Area` = as.character(`Community Area`),
            `Days to Resolve Rat Complaints` =
              fct_collapse(factor(avg_resolving_days),
               "within 3 days" = as.character(0:3),
                 "4 to 7 days" = as.character(4:7),
                "8 to 14 days" = as.character(8:14),
               "15 to 30 days" = as.character(15:30),
                "over 30 days" = as.character(31:500))) %>%
      filter(!is.na(`Community Area`))
    return(result)
  }

response_2014 <- create_response(chicago_rodents_2014)
response_2015 <- create_response(chicago_rodents_2015)
response_2016 <- create_response(chicago_rodents_2016)
response_2017 <- create_response(chicago_rodents_2017)
response_2018 <- create_response(chicago_rodents_2018)

chicago_shp <- 
  read_sf(here("data",
               "geo_export_b4a17cee-2ce2-4bde-9c91-82d509e2a7c9.shp"))

create_res_plot <- 
  function(year_response, year) {
    year_map <- 
      chicago_shp %>%
      left_join(year_response, by = c("area_num_1" = "Community Area"))
    if(year == 2014) {
      scale_map <- 
        scale_fill_manual(values = c("#ffffbf", # 3
                                     "#fdae61", # 4
                                     "#d7191c"  # 5
                                     ))
    } else if (year == 2015) {
      scale_map <-
        scale_fill_manual(values = c("#a6d96a", # 2
                                     "#fdae61", # 4
                                     "#d7191c"  # 5
                                     ))
    } else if (year == 2016) {
      scale_map <-
        scale_fill_manual(values = c("#a6d96a", # 2
                                     "#ffffbf"  # 3
                                     ))
    } else if (year == 2017) {
      scale_map <- 
        scale_fill_manual(values = c("#1a9641", # 1
                                     "#a6d96a"  # 2
                                     ))
    } else if (year == 2018) {
      scale_map <- 
        scale_fill_manual(values = c("#1a9641", # 1
                                     "#a6d96a"  # 2
                                     ))
    }
    return (
      ggplot(year_map) +
        geom_sf(aes(fill = `Days to Resolve Rat Complaints`)) +
        coord_sf(  datum = NA) +
        background_grid(minor = "none") +
        scale_map +
        cowplot_theme +
        theme(legend.position = "none")
    )
  }

# Plot
map_14 <- create_res_plot(response_2014, 2014)
map_15 <- create_res_plot(response_2015, 2015)
map_16 <- create_res_plot(response_2016, 2016)
map_17 <- create_res_plot(response_2017, 2017)
map_18 <- create_res_plot(response_2018, 2018)

legend_response <-
  response_2014 %>%
  mutate(`Days to Resolve Rat Complaints` = as.character(`Days to Resolve Rat Complaints`))
  
legend_response[[1,3]] <- "within 3 days"
legend_response[[3,3]] <- "4 to 7 days"

map_legend <-
  chicago_shp %>%
  left_join(legend_response, by = c("area_num_1" = "Community Area")) %>%
  ggplot() +
  geom_sf(aes(fill = `Days to Resolve Rat Complaints`)) +
  coord_sf(datum = NA) +
  cowplot_theme +
  theme(legend.text = element_text(size = 13),
       legend.title = element_text(size = 14, 
                                  face = "bold")) +
  scale_fill_manual(values = c("#fdae61", # 4
                               "#a6d96a", # 2
                               "#ffffbf", # 3
                               "#d7191c", # 5
                               "#1a9641"  # 1
                               ),
                    breaks = c("within 3 days",
                               "4 to 7 days",
                               "8 to 14 days",
                               "15 to 30 days",
                               "over 30 days")) +
   background_grid(major = "none", minor = "none")

legend <- get_legend(map_legend)

maps <-
  plot_grid(map_14, 
            map_15, 
            map_16,
            legend,
            map_17,
            map_18,
            labels = c("2014", "2015", "2016", " ", "2017", "2018"), 
            ncol = 3) +
  background_grid(major = "none", minor = "none") +
  cowplot_theme +
  theme(plot.background = element_rect(color = "linen", size = 2))

maps <- as.ggplot(maps)
maps +
  labs(title = "All Chicago Communities Have Reduced Average\nResponse Time to Rat Complaints Over Years",
    subtitle = "Response time to rat complaints peaked at 2015 especially around\ncommunities in the west, and then dropped sharply",
     caption = "Source: Chicago Data Portal") +
  custom_theme +
  theme(
                axis.title.x = element_blank(),
                 axis.text.x = element_blank(),
                axis.ticks.x = element_blank(),
                axis.title.y = element_blank(),
                axis.text.y  = element_blank(),
                axis.ticks.y = element_blank(),
                 axis.line.x = element_line(color = "linen", size = 1),
                 axis.line.y = element_line(color = "linen", size = 1),
          axis.line.x.bottom = element_line(color = "linen", size = 1),
            axis.line.y.left = element_line(color = "linen", size = 1),
             plot.background = element_rect( fill = "linen"),
                  plot.title = element_text(hjust = 0.5),
               plot.subtitle = element_text(hjust = 0.5)
    )


Chicago Rats Be Gone

Created by Guan He